{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "# TensorRT Engine Report Card\n",
    "\n",
    "Use this Jupyter worksheet to get a quick overview of the structure and characteristics of a TensorRT Engine plan."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "import IPython\n",
    "from ipywidgets import widgets\n",
    "from trex import *\n",
    "from trex.notebook import *\n",
    "from trex.report_card import *\n",
    "\n",
    "# Choose an engine file to load.\n",
    "engine_name = \"../tests/inputs/mobilenet.qat.onnx.engine\"\n",
    "engine_name = \"../tests/inputs/mobilenet_v2_residuals.qat.onnx.engine\"\n",
    "set_wide_display()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "## Load JSON Files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "plan = EnginePlan(f\"{engine_name}.graph.json\", f\"{engine_name}.profile.json\", f\"{engine_name}.profile.metadata.json\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Render Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "report_card_draw_plan_graph_extended(plan, engine_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "<html><div style=\"text-align:center;background:#76b900;padding:20px;color:#ffffff;font-size:2em;\">Plan Summary</div></html>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "# \"Average time\" refers to the sum of the layer latencies, when profiling layers separately\n",
    "# \"Latency\" refers to the [min, max, mean, median, 99% percentile] of the engine latency measurements, when timing the engine w/o profiling layers.\n",
    "plan.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "layer_latency_sunburst(plan.df, \"Layers Latencies (%)\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "report_card_table_view(plan)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Timings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_engine_timings(timing_json_file= f\"{engine_name}.timing.json\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "## Performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "report_card_perf_overview_widget(plan)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "## Memory Footprint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "report_card_memory_footprint_widget(plan)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "<html><div style=\"text-align:center;background:#76b900;padding:20px;color:#ffffff;font-size:2em;\">Convolutions</div></html>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "convs = plan.get_layers_by_type('Convolution')\n",
    "report_card_convolutions_overview_widget(convs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tactics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "latency_vs_prec_per_conv = partial(\n",
    "    plotting.plotly_bar2,\n",
    "    convs,\n",
    "    values_col='latency.pct_time',\n",
    "    names_col='Name',\n",
    "    color='tactic')\n",
    "\n",
    "latency_vs_prec_per_conv(\"Latency per Layer (color=Tactics)\")\n",
    "\n",
    "tactic_cnt = group_count(plan.df, 'tactic')\n",
    "display_df(tactic_cnt)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Experimental \n",
    "\n",
    "The data below are based on partial information and are provided here just for exploration and fun.\n",
    "\n",
    "Examples of simplifying assumptions:\n",
    "* Convolutions and matrix-multiplications are implemented using implicit-gemm. In practice, various algorithms might be used, which will affect the arithmetic-intensity.\n",
    "* Input/output activations (feature-maps) and parameters (weights and other constants) are read once, and from device global memory. In practice, it is likely that activations will be resident in the L2 cache, which has a much higher bandwidth compared to global memory. Matrix multiplication is performed using activation tiles which are read (reused) multiple times. These reads are ignored in the calculation of the arithmetic-intensity.\n",
    "* Compute-efficiency and memory-efficiency are calculated by dividing the MACs or bytes by the total time per layer, instead of dividing by the time spent only on compute or only on memory access.\n",
    "* Some convolutions have fued operations (e.g. SiLU activation) which are currently ignored in the calculation of the number of operations.\n",
    "\n",
    "\n",
    "Performance optimization references:\n",
    "* [Nvidia DL Peformance Background: Understand Performance](https://docs.nvidia.com/deeplearning/performance/dl-performance-gpu-background/index.html#understand-perf)\n",
    "* [Nvidia DL Peformance: GEMM](https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#math-mem)\n",
    "* [CUDA Programming: Instruction Throughput](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#maximize-instruction-throughput)\n",
    "* [TensorRT Developer Guide: Performance Best Practices](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#performance)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "report_card_gemm_MNK(plan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "report_card_gemm_MNK_scatter(plan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "report_card_efficiency_vs_latency_3d(plan)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "report_card_perf_scatter(plan)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "<html><div style=\"text-align:center;background:#76b900;padding:20px;color:#ffffff;font-size:2em;\">Layer Lint Utility</div></html>\n",
    "\n",
    "Linting functions perform static analysis of the plan to flag possible performance hazards.<br>\n",
    "See TensorRT's [Performance Best Prcatices](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#performance) for more information.\n",
    "\n",
    "<b>The linting is in an early experimental stage and may be imcomplete or erronous.<b>\n",
    "\n",
    "## Convolution Lint\n",
    "\n",
    "Ideally all Float16 and INT8 convolutions are accelerated on Tensor Cores, so `ConvLinter` uses heuristics on the kernel name to determine if the kernel is accelerated on TCs.  This method is probably incorrect is some cases and needs more investment."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "display_df(ConvLinter(plan).lint())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "## Reformat Lint\n",
    "\n",
    "Reformat layers copy their input to their output while making some changes to the data.<br>\n",
    "A Reformat layer may change the data layout (e.g. from NCHW to NC32HW) or perform data-type conversion (e.g. from float32 to INT8).\n",
    "\n",
    "A Reformat layer is added by to the engine graph by TensorRT's graph-optimizer for one of several reasons, which are indicated by the `attr.origin` field.\n",
    "* REFORMAT: type or layout conversion.\n",
    "* SLICE: slice layer output conversion.\n",
    "* CONCAT: concat layer input conversion.\n",
    "* IDENTITY: identity layer conversion.\n",
    "* NOOP: no-op layer conversion (e.g no-op shuffle or scale).\n",
    "* NVMOPT: injected by the NVM region optimizer.\n",
    "* QDQ: o << \"QDQ\"; break;\n",
    "\n",
    "\n",
    "Reformat layers that perform data-type conversion from float32/float16 to INT8, or vice-versa, may indicate poorly placed Q/DQ layers in a QAT network. <br>\n",
    "These are Q/DQ layers which could not be fused with another layer in the engine graph and may be quite costly in latency."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "report_card_reformat_overview(plan);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "display_df(ReformatLinter(plan).lint())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "## Slice Linter\n",
    "\n",
    "Slice layers that perform data-type conversion may indicate an optimization opportunity."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "display_df(SliceLinter(plan).lint())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "## Q/DQ Linter\n",
    "\n",
    "Quantize/Dequenatize layers perform a copy with quantization/dequantization.<br>\n",
    "Unfused Q/DQ layers (\"dangling Q/DQ\") are very wasteful and usually indicate poorly placement of fake-quantization in the training model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "display_df(QDQLinter(plan).lint())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": false
       }
      }
     }
    }
   },
   "source": [
    "## Pointwise Layers\n",
    "\n",
    "Pointwise and Elementwise layers can be fused to create larger kernels.<br>\n",
    "Here you can explore how well these layers managed to fuse."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "outputs": [],
   "source": [
    "report_card_pointwise_lint(plan)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "extensions": {
     "jupyter_dashboards": {
      "version": 1,
      "views": {
       "grid_default": {},
       "report_default": {
        "hidden": true
       }
      }
     }
    }
   },
   "source": [
    "<html><div style=\"text-align:center;background:#76b900;padding:20px;color:#ffffff;font-size:2em;\">Excel Summary</div></html>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generate a default summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from trex.excel_summary import *\n",
    "\n",
    "summary = ExcelSummary(plan, path=\"default_summary.xlsx\")\n",
    "summary.generate_default_summary() # 'generate_default_summary' automatically saves the file"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Generate a customized summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "summary = ExcelSummary(plan, path=\"customized_summary.xlsx\")\n",
    "summary.add_dataframes({\"df\": plan.df})\n",
    "summary.add_images({\"trex_logo\": \"../images/trex_logo.png\"})\n",
    "summary.add_dataframes({\"clean_df\": clean_for_display(plan.df)})\n",
    "summary.save() # 'save' needs to be called to propagate the changes to the file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# The context manager automatically saves the file\n",
    "with ExcelSummary(plan, path=\"customized_summary_with_manager.xlsx\") as summary:\n",
    "    summary.add_dataframes({\"df\": plan.df})\n",
    "    summary.add_images({\"trex_logo\": \"../images/trex_logo.png\"})\n",
    "    summary.add_dataframes({\"clean_df\": clean_for_display(plan.df)})\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "extensions": {
   "jupyter_dashboards": {
    "activeView": "report_default",
    "version": 1,
    "views": {
     "grid_default": {
      "cellMargin": 10,
      "defaultCellHeight": 20,
      "maxColumns": 12,
      "name": "grid",
      "type": "grid"
     },
     "report_default": {
      "name": "report",
      "type": "report"
     }
    }
   }
  },
  "interpreter": {
   "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
